載入packages
library("Seurat")
## Loading required package: SeuratObject
## Loading required package: sp
## 'SeuratObject' was built under R 4.3.0 but the current version is
## 4.3.2; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed
## 'SeuratObject' was built with package 'Matrix' 1.6.3 but the current
## version is 1.6.4; it is recomended that you reinstall 'SeuratObject' as
## the ABI for 'Matrix' may have changed
##
## Attaching package: 'SeuratObject'
## The following object is masked from 'package:base':
##
## intersect
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("magrittr")
將單一樣本資料前處理,寫成一個function(根據老師上課的模板與paper給予的參數調整) (1)讀檔 (matrix.mtx, barcode.tsv, and feature.tsv) (2)去除duplicated genes (3)資料載入seurat (4)計算mito gene的比例 (5)Cell QC: 複雜度500~6000,至少三顆細胞表達,粒線體基因不超過20% (6)Normalization: scaled to 10000 and log transformed (7)Identify highly variable genes
createSeuratObjEachSample <- function(project.name,
dir,
min.cells = 3, # paper
min.features = 500, # paper
max.features = 6000, # paper
assay = "RNA",
max.mt.percentage = 20, # paper
num.hvf = 2000,
mt.pattern = "^mt-",
normalization.method = "LogNormalize", # paper
hvf.selection.method = "vst"){
#### (1) Read files
cat(c("Read files\n"))
barcode.path <- file.path(dir, "barcodes.tsv")
features.path <- file.path(dir, "genes.tsv")
matrix.path <- file.path(dir, "matrix.mtx")
mat <- Matrix::readMM(file = matrix.path)
feature.names = read.delim(features.path,
header = FALSE,
stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path,
header = FALSE,
stringsAsFactors = FALSE)
colnames(mat) = paste(project.name, barcode.names$V1, sep = "_")
rownames(mat) = feature.names$V2
mat <- as.matrix(mat)
cat(c("-> matrix:", dim(mat), "\n"))
#### (2) 將基因名子一樣的資料加總起來
cat(c("Remove duplicated genes\n"))
IDfreqs <- table(rownames(mat))
IDfreqs <- IDfreqs[IDfreqs > 1]
cat(c("-> Num. dup. genes:", length(IDfreqs), "\n"))
if (length(IDfreqs) > 0){
for (i in names(IDfreqs)) {
index <- which(rownames(mat) == i)
duplicates <- as.matrix(mat[index, ])
sums <- apply(duplicates, 2, sum, na.rm=TRUE)
### 置換成加總數值
mat[index[1],] <- sums
### 將重複的資料移除
for (j in length(index):2) {
mat <- mat[-index[j], ]
}
}
}
cat(c("-> matrix:", dim(mat), "\n"))
#### (3) 資料載入seurat
cat(c("Create Seurat Object\n"))
seurat.obj <- Seurat::CreateSeuratObject(counts = mat,
project = project.name,
min.cells = min.cells,
min.features = min.features,
assay = assay,
meta.data = NULL)
cat(c("-> dimension:", dim(seurat.obj), "\n"))
#### (4) 計算mito gene的比例
cat(c("Calculate MT percentage\n"))
seurat.obj[["Mito_percent"]] <- Seurat::PercentageFeatureSet(seurat.obj, pattern = mt.pattern)
#### (5) Cell QC
cat(c("Cell QC\n"))
min_nFeature <- min.features
max_nFeature <- max.features
max_Mito_percent <- max.mt.percentage
seurat.obj <- seurat.obj %>%
subset(., subset = nFeature_RNA > min_nFeature & nFeature_RNA < max_nFeature) %>%
subset(., subset = Mito_percent < max_Mito_percent) # paper
cat(c("-> dimension:", dim(seurat.obj), "\n"))
#### (6) Normalization
cat(c("Normalization\n"))
seurat.obj <- Seurat::NormalizeData(seurat.obj,
normalization.method = normalization.method,
scale.factor = 10000) # paper
#### (7) Identify highly variable genes
cat(c("High variable genes\n"))
seurat.obj <- Seurat::FindVariableFeatures(seurat.obj,
selection.method = hvf.selection.method,
nfeatures = num.hvf
)
return(seurat.obj)
}
1.將每個樣本建構出Seurat object
k_seurat <- createSeuratObjEachSample (project.nam = "k",
dir = "K",
min.cells = 3,
min.features = 500,
max.features = 6000,
assay = "RNA",
max.mt.percentage = 20,
num.hvf = 2000,
mt.pattern = "^mt-",
normalization.method = "LogNormalize",
hvf.selection.method = "vst")
## Read files
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 1.0 GiB
## -> matrix: 20304 6696
## Remove duplicated genes
## -> Num. dup. genes: 0
## -> matrix: 20304 6696
## Create Seurat Object
## Warning: Data is of class matrix. Coercing to dgCMatrix.
## -> dimension: 18738 6696
## Calculate MT percentage
## Cell QC
## -> dimension: 18738 6696
## Normalization
## Normalizing layer: counts
## High variable genes
## Finding variable features for layer counts
kl_seurat <- createSeuratObjEachSample (project.nam = "kl",
dir = "KL",
min.cells = 3,
min.features = 500,
max.features = 6000,
assay = "RNA",
max.mt.percentage = 20,
num.hvf = 2000,
mt.pattern = "^mt-",
normalization.method = "LogNormalize",
hvf.selection.method = "vst")
## Read files
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 1.1 GiB
## -> matrix: 20304 7564
## Remove duplicated genes
## -> Num. dup. genes: 0
## -> matrix: 20304 7564
## Create Seurat Object
## Warning: Data is of class matrix. Coercing to dgCMatrix.
## -> dimension: 19458 7564
## Calculate MT percentage
## Cell QC
## -> dimension: 19458 7564
## Normalization
## Normalizing layer: counts
## High variable genes
## Finding variable features for layer counts
觀察兩個Seurat object
VlnPlot(k_seurat, features = c("nFeature_RNA", "nCount_RNA", "Mito_percent"), ncol = 3)
VlnPlot(kl_seurat, features = c("nFeature_RNA", "nCount_RNA", "Mito_percent"), ncol = 3)
data.list <- list("k" = k_seurat, "kl" = kl_seurat)
# select features that are repeatedly variable across datasets for integration
features <- Seurat::SelectIntegrationFeatures(object.list = data.list,
nfeatures = 2000,
verbose = FALSE)
# identify anchors
anchors <- Seurat::FindIntegrationAnchors(object.list = data.list,
anchor.features = features,
verbose = F)
# creates 'integrated' data assay
combined.obj <- Seurat::IntegrateData(anchorset = anchors)
## Merging dataset 1 into 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
DefaultAssay(combined.obj) <- "integrated"
dim(combined.obj)
## [1] 2000 14260
combined.obj@meta.data$orig.ident %>% table()
## .
## k kl
## 6696 7564
combined.obj@meta.data %>% head()
## orig.ident nCount_RNA nFeature_RNA Mito_percent
## k_AAACCCAAGACAAGCC k 5388 1842 3.0252413
## k_AAACCCAAGATCGACG k 8021 2329 2.9173420
## k_AAACCCAAGGCTGTAG k 7292 2613 2.6330225
## k_AAACCCACAATAGAGT k 2706 912 0.4065041
## k_AAACCCACAGATTAAG k 4832 1579 3.7251656
## k_AAACCCAGTATGGTAA k 8292 2217 3.3043898
combined.obj <- Seurat::ScaleData(combined.obj, verbose = FALSE)
combined.obj <- Seurat::RunPCA(combined.obj, npcs = 50, verbose = FALSE)
ElbowPlot(combined.obj, ndims = 50)
pcs <- 20 # 肉眼判斷為20(符合paper作法)
visualizing both cells and features that define the PCA
VizDimLoadings(combined.obj, dims = 1:2, reduction = "pca")
DimPlot(combined.obj, reduction = "pca") + NoLegend()
DimHeatmap(combined.obj, dims = 1, cells = 500, balanced = TRUE)
combined.obj <- combined.obj %>%
Seurat::RunUMAP(reduction = "pca", umap.method = "uwot", dims = 1:pcs, verbose = FALSE) %>%
Seurat::RunTSNE(reduction="pca", dims= 1:pcs, verbose = FALSE) %>%
Seurat::FindNeighbors(reduction = "pca", dims = 1:pcs, verbose = FALSE) %>%
Seurat::FindClusters(resolution = 0.06, verbose = FALSE) %>% # paper提供resolution = 0.06
Seurat::FindSubCluster("0",resolution = 0.06, graph.name = "integrated_snn",
subcluster.name = "new_cluster") # T cell進一步subcluster
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4739
## Number of edges: 192814
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9576
## Number of communities: 2
## Elapsed time: 0 seconds
觀察cluster狀況
Seurat::DimPlot(combined.obj, reduction="umap")
Seurat::DimPlot(combined.obj, reduction="tsne")
Seurat::DimPlot(combined.obj, reduction="tsne", group.by = "orig.ident")
Seurat::DimPlot(combined.obj, reduction="umap", group.by = "new_cluster")
觀察Feature狀況
FeaturePlot(combined.obj, reduction="umap", features = "Mito_percent")
markers <- FindAllMarkers(combined.obj, only.pos = TRUE) %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1) %>%
dplyr::arrange(desc(avg_log2FC)) %>%
slice_head(n = 10) %>%
ungroup() -> top10
## Calculating cluster 0
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 1
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 2
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 3
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 4
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Warning in mean.fxn(object[features, cells.2, drop = FALSE]): NaNs produced
## Calculating cluster 5
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 6
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 7
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 8
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 9
# 將找到的marker進行視覺化,作heatmap
Seurat::DoHeatmap(combined.obj, group.by = "ident", features = top10$gene,draw.lines = TRUE) + NoLegend()
head(markers)
## # A tibble: 6 × 7
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 8.33e- 63 5.01 0.155 0.061 1.67e- 59 0 Il17f
## 2 4.75e-180 4.99 0.219 0.041 9.49e-177 0 Cd163l1
## 3 0 4.71 0.465 0.135 0 0 Dapl1
## 4 1.75e-127 4.68 0.134 0.036 3.49e-124 0 Trgv2
## 5 0 4.63 0.121 0.002 0 0 Trav4-2
## 6 3.39e- 62 4.46 0.186 0.091 6.78e- 59 0 Tcrg-C1
可以將特定gene作圖
FeaturePlot(combined.obj, reduction="umap", features = "Cd19", label = T)
## Warning: Could not find Cd19 in the default search locations, found in 'RNA'
## assay instead
VlnPlot(combined.obj, features = c("Cd19", "Cd3e"))
## Warning: Could not find Cd19 in the default search locations, found in 'RNA'
## assay instead
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
6.annotation(from paper)
b_cell_mark <- c("Cd19","Cd22","Cd79a","Cd79b")
cancer_mark <- c("Epcam","Krt18","Krt8","Krt19")
dendritic_mark <- c("Cd86","Cd83","Clec10a","Ccl22","Cxcl16","Xcr1","Rab7b","Naaa","Btla")
endo_mark <- c("Pecam1","Tek","Plvap","Thbd","Tmem100","Adgrf5","Podxl","Nostrin","Acvrl1")
macrophage_mark <- c("Adgre1","Itgam","Itgal","Cd14","Ccl9","F13a1","Cx3cr1","Cd68")
neutrophil_mark <- c("Csf3r","Cxcr2S100a8")
nk_mark <- c("Klrd1","Nkg7","Prf1","Gzma","Gzmb","Ccl5","Cma1","Klre1","Klra4")
plasma_dend_mark <- c("Ccr9","Bst2")
t_cell_mark <- c("Cd3d","Cd5","Cd3e")
觀察cell cluster marker是否能夠代表該cluster
FeaturePlot(combined.obj, reduction="umap", features = b_cell_mark[1:4], label = T)
## Warning: Could not find Cd19 in the default search locations, found in 'RNA'
## assay instead
## Warning: Could not find Cd22 in the default search locations, found in 'RNA'
## assay instead
FeaturePlot(combined.obj, reduction="umap", features = cancer_mark[1:4], label = T)
FeaturePlot(combined.obj, reduction="umap", features = dendritic_mark[1:4], label = T)
FeaturePlot(combined.obj, reduction="umap", features = endo_mark[1:4], label = T)
FeaturePlot(combined.obj, reduction="umap", features = macrophage_mark[1:4], label = T)
## Warning: Could not find Itgal in the default search locations, found in 'RNA'
## assay instead
FeaturePlot(combined.obj, reduction="umap", features = neutrophil_mark, label = T)
## Warning: The following requested variables were not found: Cxcr2S100a8
FeaturePlot(combined.obj, reduction="umap", features = nk_mark[1:4], label = T)
FeaturePlot(combined.obj, reduction="umap", features = plasma_dend_mark, label = T)
# T cell
p1 <- FeaturePlot(combined.obj, reduction="umap", features = "Cd3e", label = T) # T cell
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
p2 <- FeaturePlot(combined.obj, reduction="umap", features = "Tcf7", label = T) # activated T
## Warning: Could not find Tcf7 in the default search locations, found in 'RNA'
## assay instead
p3 <- FeaturePlot(combined.obj, reduction="umap", features = "Ctla4", label = T) # exhausted T
cowplot::plot_grid(p1, p2, p3, ncol = 3)
VlnPlot(combined.obj, features = c("Cd3e","Tcf7","Ctla4"))
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
## Warning: Could not find Tcf7 in the default search locations, found in 'RNA'
## assay instead
cluster annotation(圖1d)
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "0_0"] <- "T cell activated"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "0_1"] <- "T cell exhausted"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "1"] <- "Endothelial cell"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "2"] <- "Macrophage"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "3"] <- "B cell"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "4"] <- "Neutrophils"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "5"] <- "dendritic cell"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "6"] <- "NK cell"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "7"] <- "cancer cell1"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "8"] <- "cancer cell2"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "9"] <- "Plasmacytoid dendritic cell"
combined.obj <- SetIdent(combined.obj, value = combined.obj@meta.data$`new_cluster`)
# 查看annotation後的umap (圖1d)
Seurat::DimPlot(combined.obj, reduction="umap", label = T) # 圖1d
7.visualization Ridge plots - from ggridges. Visualize single cell expression distributions in each cluster
features <- c("Cd3e","Ctla4")
RidgePlot(combined.obj, features = features, ncol = 1)
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
## Picking joint bandwidth of 0.0237
## Picking joint bandwidth of 0.022
Violin plot - Visualize single cell expression distributions in each cluster
VlnPlot(combined.obj, features = features, ncol = 2)
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
VlnPlot(combined.obj, features = features, split.by = "orig.ident")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
FeaturePlot
FeaturePlot(combined.obj, features = features)
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
FeaturePlot(combined.obj, features = features, reduction = "umap",blend = TRUE)
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
FeaturePlot(combined.obj, features = features, split.by = "orig.ident")
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
DotPlot -dot size corresponds to the percentage of cells expressing the feature
DotPlot(combined.obj, features = features) + RotatedAxis()
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
Heatmap
# Single cell heatmap of feature expression
DoHeatmap(subset(combined.obj, downsample = 100), features = top10$gene, size = 3) + NoLegend()
每個細胞的marker用dotplot做圖
# Identify conserved cell type markers
sg <- list(bcell = c("Cd19","Cd22","Cd79a","Cd79b"),
cancer = c("Epcam","Krt18","Krt8","Krt19"),
dendritic = c("Cd86","Cd83","Clec10a","Ccl22","Cxcl16","Xcr1","Rab7b","Naaa","Btla"),
endo = c("Pecam1","Tek","Plvap","Thbd","Tmem100","Adgrf5","Podxl","Nostrin","Acvrl1"),
macrophage = c("Adgre1","Itgam","Itgal","Cd14","Ccl9","F13a1","Cx3cr1","Cd68"),
neutro= c("Csf3r","Cxcr2S100a8"),
nkcell = c("Klrd1","Nkg7","Prf1","Gzma","Gzmb","Ccl5","Cma1","Klre1","Klra4"),
plasma_dend = c("Ccr9","Bst2"),
tcell = c("Cd3d","Cd5","Cd3e"))
DotPlot(combined.obj, features = sg, cols = c("blue", "red"), dot.scale = 4, assay = "RNA") +
RotatedAxis()
## Warning: The following requested variables were not found: Cxcr2S100a8
## Warning: The `facets` argument of `facet_grid()` is deprecated as of ggplot2 2.2.0.
## ℹ Please use the `rows` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
8.計算細胞種類數量,並創建 bar plot(圖1f)
cell_proportions <- combined.obj@meta.data[,c(1,7)]
cell_ratio <- table(cell_proportions) %>% t() %>% as.data.frame()
k_total_cell <- cell_ratio$Freq[1:11] %>% sum()
kl_total_cell <- cell_ratio$Freq[12:22] %>% sum()
k_table <- cell_ratio[1:10,]
kl_table <- cell_ratio[11:22,]
k_table$ratio <- k_table$Freq/k_total_cell
kl_table$ratio <- kl_table$Freq/kl_total_cell
final <- rbind(k_table,kl_table) %>% set_names(c("cluster","ident","freq","ratio"))
head(final)
## cluster ident freq ratio
## 1 B cell k 442 0.066009558
## 2 cancer cell1 k 313 0.046744325
## 3 cancer cell2 k 25 0.003733572
## 4 dendritic cell k 230 0.034348865
## 5 Endothelial cell k 1194 0.178315412
## 6 Macrophage k 1236 0.184587814
library(ggplot2)
p <- ggplot(data = final, aes(x = ratio, y = ident, fill = cluster)) +
geom_bar(stat = "identity") + theme_void()
p # 圖1f
創建T cell exhausted violin plot(圖1g)
exh_T <- combined.obj %>%
subset(., subset = new_cluster == "T cell exhausted")
exh_T <- SetIdent(exh_T, value = exh_T@meta.data$orig.ident)
VlnPlot(exh_T, features = "Ctla4") # 圖1g
9.抓出T cell進行分析
act_T <- combined.obj %>%
subset(., subset = new_cluster == "T cell activated")
act_T <- SetIdent(act_T, value = act_T@meta.data$orig.ident)
t_de_gene <- FindAllMarkers(act_T) %>% filter(cluster == "kl")
## Calculating cluster k
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster kl
## Warning in mean.fxn(object[features, cells.2, drop = FALSE]): NaNs produced
head(t_de_gene)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## Egr3.1 0 -0.9327606 0.032 0.940 0 kl Egr3
## Thbs1.1 0 -0.7934839 0.039 0.914 0 kl Thbs1
## Mef2c.1 0 -0.3741167 0.022 0.897 0 kl Mef2c
## Ager.1 0 -0.4077776 0.023 0.889 0 kl Ager
## Afap1l1.1 0 -1.4697333 0.011 0.876 0 kl Afap1l1
## Alcam.1 0 -0.4896868 0.030 0.887 0 kl Alcam
創建T cell activated violin plot(圖1g)
VlnPlot(act_T, features = "Tcf7") # 圖1g
## Warning: Could not find Tcf7 in the default search locations, found in 'RNA'
## assay instead
創建基因list,並按照avg_log2FC由大到小排序
library(clusterProfiler)
##
## clusterProfiler v4.10.0 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:stats':
##
## filter
library(enrichplot)
organism <- "org.Mm.eg.db"
list1 <- t_de_gene$avg_log2FC
names(list1) <- t_de_gene$gene
gsea_list_t <- na.omit(list1) %>%
sort(., decreasing = TRUE)
head(gsea_list_t)
## Pkib F630028O10Rik Fscn1 Cdc42bpa Prdm1
## 8.402034 5.632552 5.529738 5.287390 5.254809
## Hbb-bt
## 4.549129
將activated T cell基因表現量進行GSEA分析
t_gse <- clusterProfiler::gseGO(geneList = gsea_list_t,
ont = "ALL", # "BP", "MF", "CC"
keyType = "SYMBOL",
nPerm = 10000,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = organism,
pAdjustMethod = "none")
##
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
創建activated T cell的GSEA dotplot(圖1h)
dotplot(t_gse, split=".sign") + facet_grid(~.sign) # 圖1h
10.用subset將cancer cell population從combined.obj抓出來,並將k與kl基因表現量進行分析
cancer <- combined.obj %>%
subset(., subset = new_cluster == c("cancer cell1","cancer cell2"))
cancer <- SetIdent(cancer, value = cancer@meta.data$orig.ident)
c_de_gene <- FindAllMarkers(cancer) %>% filter(cluster == "kl")
## Calculating cluster k
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster kl
## Warning in mean.fxn(object[features, cells.2, drop = FALSE]): NaNs produced
head(c_de_gene)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## Btla.1 1.015995e-87 -2.2690035 0.007 0.472 2.031990e-84 kl Btla
## Bank1.1 1.020991e-87 -1.5972041 0.011 0.491 2.041982e-84 kl Bank1
## Irf4.1 3.398922e-83 -0.8570328 0.021 0.857 6.797844e-80 kl Irf4
## Ccl8.1 2.221741e-82 0.1315052 0.014 0.534 4.443481e-79 kl Ccl8
## Incenp.1 6.105497e-82 0.1424943 0.025 0.199 1.221099e-78 kl Incenp
## Knstrn.1 1.067041e-81 -0.2108657 0.025 0.832 2.134083e-78 kl Knstrn
將cancer cell基因表現量進行GSEA分析
organism <- "org.Mm.eg.db"
list2 <- c_de_gene$avg_log2FC
names(list2) <- c_de_gene$gene
gsea_list_c <- na.omit(list2) %>%
sort(., decreasing = TRUE)
c_gse <- clusterProfiler::gseGO(geneList = gsea_list_c,
ont = "ALL", # "BP", "MF", "CC"
keyType = "SYMBOL",
nPerm = 10000,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = organism,
pAdjustMethod = "none")
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in fgseaSimple(pathways = pathways, stats = stats, minSize = minSize, :
## There were 12 pathways for which P-values were not calculated properly due to
## unbalanced gene-level statistic values
## leading edge analysis...
## done...
創建cancer cell的GSEA dotplot(圖2a)
dotplot(c_gse, split=".sign") + facet_grid(~.sign) # 圖2a
11.use down regulated gene list(act_T cell)
t_down_gene <- FindAllMarkers(act_T) %>% filter(avg_log2FC < 0 & cluster == "kl")
## Calculating cluster k
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster kl
## Warning in mean.fxn(object[features, cells.2, drop = FALSE]): NaNs produced
head(t_down_gene)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## Egr3.1 0 -0.9327606 0.032 0.940 0 kl Egr3
## Thbs1.1 0 -0.7934839 0.039 0.914 0 kl Thbs1
## Mef2c.1 0 -0.3741167 0.022 0.897 0 kl Mef2c
## Ager.1 0 -0.4077776 0.023 0.889 0 kl Ager
## Afap1l1.1 0 -1.4697333 0.011 0.876 0 kl Afap1l1
## Alcam.1 0 -0.4896868 0.030 0.887 0 kl Alcam
down_t <- t_down_gene$avg_log2FC
names(down_t) <- t_down_gene$gene
down_list_t <- na.omit(down_t) %>%
sort(., decreasing = FALSE)
go_t <- clusterProfiler::enrichGO(gene = names(down_list_t),
OrgDb = "org.Mm.eg.db",
keyType = "SYMBOL",
ont = "ALL")
head(as.data.frame(go_t))
## ONTOLOGY ID Description GeneRatio
## GO:0050900 BP GO:0050900 leukocyte migration 62/649
## GO:0006935 BP GO:0006935 chemotaxis 62/649
## GO:0042330 BP GO:0042330 taxis 62/649
## GO:0050727 BP GO:0050727 regulation of inflammatory response 56/649
## GO:0097529 BP GO:0097529 myeloid leukocyte migration 44/649
## GO:0030198 BP GO:0030198 extracellular matrix organization 48/649
## BgRatio pvalue p.adjust qvalue
## GO:0050900 398/28891 1.150597e-33 5.746083e-30 2.937051e-30
## GO:0006935 462/28891 6.668120e-30 1.419956e-26 7.257959e-27
## GO:0042330 464/28891 8.529972e-30 1.419956e-26 7.257959e-27
## GO:0050727 411/28891 1.995412e-27 2.491272e-24 1.273388e-24
## GO:0097529 253/28891 3.414838e-26 3.410740e-23 1.743364e-23
## GO:0030198 322/28891 2.151251e-25 1.765610e-22 9.024733e-23
## geneID
## GO:0050900 Pf4/Ppbp/Ednra/Il17a/Nkx2-3/Itga2b/Cadm1/Tnfsf11/Mmp2/Ccr1/Ccr2/Ccl17/Apod/Cd177/Ccr6/Mmp14/Thbs1/Itga9/Tnfaip6/C3ar1/Cxcr2/Cxcl3/Ccl21a/Cd200/Kit/Sele/Lbp/Ch25h/Serpine1/Aif1/Cxcl1/S100a14/Ccl20/Cd9/Sirpa/Pla2g7/Ccl7/Cd34/Ager/Il33/Gata3/Edn1/Fcer1g/Tnf/Mmp9/Tlr2/Cd24a/Fpr2/Cxcl14/Vegfa/Podxl/Il23a/S100a8/Flt1/Trem2/Gpr183/Lgals3/Slc12a2/Tnfrsf18/Ccr7/Plvap/Fcgr3
## GO:0006935 Pf4/Ppbp/Ednra/Ccr10/Tnfsf11/Mmp2/Ccr1/Ccr2/Ccl17/Egr3/Ccr6/Nrg1/Sema3e/Thbs1/Itga9/Tnfaip6/C3ar1/Cxcr2/Cxcl3/Sema3d/Ccl21a/Saa3/Ccr4/Kit/Lbp/Ch25h/Serpine1/Aif1/Eng/Slit3/Cxcl1/Ear10/S100a14/Ccl20/Pla2g7/Ccl7/Ager/Sema6a/Edn1/Fcer1g/Mmp9/Nrp2/Lox/Sema6d/Fpr2/Cxcl14/Ryk/Vegfa/Bmpr2/Il23a/Cdh13/S100a8/Flt1/Trem2/Ear1/Gpr183/Lrp1/Lgals3/Slc12a2/Ccr7/Nrp1/Fcgr3
## GO:0042330 Pf4/Ppbp/Ednra/Ccr10/Tnfsf11/Mmp2/Ccr1/Ccr2/Ccl17/Egr3/Ccr6/Nrg1/Sema3e/Thbs1/Itga9/Tnfaip6/C3ar1/Cxcr2/Cxcl3/Sema3d/Ccl21a/Saa3/Ccr4/Kit/Lbp/Ch25h/Serpine1/Aif1/Eng/Slit3/Cxcl1/Ear10/S100a14/Ccl20/Pla2g7/Ccl7/Ager/Sema6a/Edn1/Fcer1g/Mmp9/Nrp2/Lox/Sema6d/Fpr2/Cxcl14/Ryk/Vegfa/Bmpr2/Il23a/Cdh13/S100a8/Flt1/Trem2/Ear1/Gpr183/Lrp1/Lgals3/Slc12a2/Ccr7/Nrp1/Fcgr3
## GO:0050727 Il17a/Pde5a/Enpp3/Dnase1l3/Ighg1/Tnfsf11/C2cd4b/Ccr2/Il6/Fcer1a/Cma1/Ctla2a/Fabp4/Lilra5/Fcgr2b/H2-M2/Wfdc1/Tnfaip6/Cd200r3/Fcgr1/Cd200/Ptgs2/Lbp/Ptges/Il10/Gpx2/Serpine1/Mmp8/Pla2g2d/Hyal2/Sirpa/Slc7a2/Ighg2b/Igf1/Slc39a8/Ager/C3/Il33/Gata3/Fcer1g/Tnf/Tlr2/Cd24a/Fpr2/Mefv/Lta/Ier3/S100a8/Trem2/Pbk/Tnc/Ccr7/Cebpa/Lgals1/Mgll/Fcgr3
## GO:0097529 Pf4/Ppbp/Ednra/Il17a/Tnfsf11/Mmp2/Ccr1/Ccr2/Ccl17/Cd177/Mmp14/Thbs1/Itga9/Tnfaip6/C3ar1/Cxcr2/Cxcl3/Ccl21a/Cd200/Kit/Lbp/Serpine1/Aif1/Cxcl1/S100a14/Ccl20/Cd9/Sirpa/Pla2g7/Ccl7/Ager/Edn1/Fcer1g/Mmp9/Tlr2/Fpr2/Vegfa/Il23a/S100a8/Flt1/Trem2/Lgals3/Ccr7/Fcgr3
## GO:0030198 Aebp1/Colq/Loxl1/Sfrp2/Mmp2/Col4a4/Tnfrsf11b/Il6/Ecm2/Spock2/Cav2/Cma1/Col4a2/Col4a1/Col5a2/Mmp14/Ddr2/Col14a1/Rgcc/Adamts9/Eng/Adamtsl2/Ccdc80/Mmp8/Fbln5/Ramp2/Slc39a8/Col5a1/Col1a1/Loxl2/Tnf/Egfl6/Mmp9/Serpinh1/Lox/Has1/Tgfbi/Col4a5/Itga8/Cst3/Pxdn/Eln/Adamts1/Npnt/Lgals3/Mmp19/Ndnf/Gfod2
## Count
## GO:0050900 62
## GO:0006935 62
## GO:0042330 62
## GO:0050727 56
## GO:0097529 44
## GO:0030198 48
barplot(go_t, showCategory = 20, # 圖1h
title = "Top20 GO pathways downregulated in activated T cells of KL vs. K",
label_format = 60)
12.use down regulated gene list(cancer cell)
c_down_gene <- FindAllMarkers(cancer) %>% filter(avg_log2FC < 0 & cluster == "kl")
## Calculating cluster k
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster kl
## Warning in mean.fxn(object[features, cells.2, drop = FALSE]): NaNs produced
head(c_down_gene)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## Btla.1 1.015995e-87 -2.2690035 0.007 0.472 2.031990e-84 kl Btla
## Bank1.1 1.020991e-87 -1.5972041 0.011 0.491 2.041982e-84 kl Bank1
## Irf4.1 3.398922e-83 -0.8570328 0.021 0.857 6.797844e-80 kl Irf4
## Knstrn.1 1.067041e-81 -0.2108657 0.025 0.832 2.134083e-78 kl Knstrn
## Lmo1.1 4.653159e-81 -2.1982336 0.018 0.814 9.306317e-78 kl Lmo1
## Dcstamp.1 7.250898e-81 -0.8819352 0.014 0.957 1.450180e-77 kl Dcstamp
down_c <- c_down_gene$avg_log2FC
names(down_c) <- c_down_gene$gene
down_list_c <- na.omit(down_c) %>%
sort(., decreasing = FALSE)
go_c <- clusterProfiler::enrichGO(gene = names(down_list_c),
OrgDb = "org.Mm.eg.db",
keyType = "SYMBOL",
ont = "BP")
head(as.data.frame(go_c))
## ID Description GeneRatio
## GO:0050900 GO:0050900 leukocyte migration 58/506
## GO:0006935 GO:0006935 chemotaxis 58/506
## GO:0042330 GO:0042330 taxis 58/506
## GO:0060326 GO:0060326 cell chemotaxis 50/506
## GO:0050867 GO:0050867 positive regulation of cell activation 56/506
## GO:0050727 GO:0050727 regulation of inflammatory response 53/506
## BgRatio pvalue p.adjust qvalue
## GO:0050900 398/28891 8.051469e-36 3.637654e-32 2.117960e-32
## GO:0006935 462/28891 3.192707e-32 6.089368e-29 3.545428e-29
## GO:0042330 464/28891 4.043405e-32 6.089368e-29 3.545428e-29
## GO:0060326 331/28891 1.001735e-31 1.131459e-28 6.587724e-29
## GO:0050867 454/28891 9.996200e-31 9.032566e-28 5.259053e-28
## GO:0050727 411/28891 4.431942e-30 3.337252e-27 1.943057e-27
## geneID
## GO:0050900 Ctsg/Tnfsf11/Ccl12/Crtam/C3ar1/Cxcl13/F7/Ccl21a/Ccl4/Ednrb/Trem2/Ccl24/Vegfc/Ccr7/Cxcr1/Mmp2/Ccr2/Ccl7/Il1b/Aif1/Itgam/Tlr2/Csf1r/Ccl9/Ccl2/Sirpa/Tnfsf4/Ch25h/Slamf9/Ccl6/Fcgr3/Pycard/Ccr1/Cx3cr1/Adam8/Ccl3/Cd300a/Fcer1g/Tnfrsf18/Trem1/Jaml/Pgf/Lgals3/Gpr183/Ptafr/Pla2g7/Itga2b/Flt1/Selp/Eps8/Cyp7b1/Bst1/Thbs1/Pecam1/Ppbp/Lyve1/Ecm1/Tnf
## GO:0006935 Ctsg/Tnfsf11/Ccl12/C3ar1/Cxcl13/F7/Ccl21a/Ccl4/Ednrb/Saa3/Ccr4/Trem2/Ccl24/Vegfc/Ccr7/Cxcr1/Mmp2/Ccr2/Ccl7/Il1b/Aif1/Itgam/Csf1r/Ccl9/Ccl2/Ch25h/Nrg1/Ccr10/Slamf9/Ccl6/Lrp1/Fcgr3/Ccr1/Cx3cr1/Adam8/Ccl3/Ear2/Fcer1g/Trem1/Hmgb2/Jaml/Fn1/Pgf/Lgals3/Ear1/Gpr183/Ccrl2/Ccr5/Ptafr/Nrp2/Pla2g7/Flt1/Dock4/Cyp7b1/Bst1/Thbs1/Cxcr6/Ppbp
## GO:0042330 Ctsg/Tnfsf11/Ccl12/C3ar1/Cxcl13/F7/Ccl21a/Ccl4/Ednrb/Saa3/Ccr4/Trem2/Ccl24/Vegfc/Ccr7/Cxcr1/Mmp2/Ccr2/Ccl7/Il1b/Aif1/Itgam/Csf1r/Ccl9/Ccl2/Ch25h/Nrg1/Ccr10/Slamf9/Ccl6/Lrp1/Fcgr3/Ccr1/Cx3cr1/Adam8/Ccl3/Ear2/Fcer1g/Trem1/Hmgb2/Jaml/Fn1/Pgf/Lgals3/Ear1/Gpr183/Ccrl2/Ccr5/Ptafr/Nrp2/Pla2g7/Flt1/Dock4/Cyp7b1/Bst1/Thbs1/Cxcr6/Ppbp
## GO:0060326 Ctsg/Tnfsf11/Ccl12/C3ar1/Cxcl13/F7/Ccl21a/Ccl4/Ednrb/Saa3/Ccr4/Ccl24/Vegfc/Ccr7/Cxcr1/Mmp2/Ccr2/Ccl7/Il1b/Aif1/Itgam/Csf1r/Ccl9/Ccl2/Ch25h/Ccr10/Slamf9/Ccl6/Fcgr3/Ccr1/Cx3cr1/Adam8/Ccl3/Fcer1g/Trem1/Hmgb2/Jaml/Pgf/Lgals3/Gpr183/Ccrl2/Ccr5/Pla2g7/Flt1/Dock4/Cyp7b1/Bst1/Thbs1/Cxcr6/Ppbp
## GO:0050867 Ms4a2/Tnfsf11/Foxp3/Ccl21a/Cd160/Il10/Cd209a/Il18/Trem2/Tnfrsf4/Il1rl1/Havcr2/Ccr7/Ccr2/Cd86/Il1b/Aif1/Itgam/Csf1r/Ccl2/Il5/Sirpa/Tnfsf4/Pycard/Lilra5/Adam8/Tyrobp/Lgals1/Fcer1g/Tnfrsf13c/Cd274/Cd209d/Mef2c/Gpr183/Cdkn1a/Igf1/Actb/Clec4d/Ptafr/Zbtb16/Acta2/Nlrp3/Ighm/Il6/Axl/Cd83/Selp/Icos/Nfkbiz/Bst1/Thbs1/Cd40/Gata2/Tnf/Malt1/Ighd
## GO:0050727 Dnase1l3/Tnfsf11/Tnc/Snca/Cd200r3/Foxp3/Il10/Ednrb/Ptgis/Il18/Trem2/Il1rl1/Il22ra2/Ccl24/Ccr7/Ccr2/Il1b/Tlr2/Csf1r/Osm/Enpp3/Mefv/Sirpa/Tnfsf4/Ctss/Acp5/Fcgr3/Pycard/Ahr/Cx3cr1/Lilra5/Adam8/Ccl3/Alox5ap/Lgals1/Fcer1g/Apoe/Slc7a2/Lpl/Igf1/Ccr5/Nkg7/Nlrp3/Cd44/Il6/Fcgr1/Ier3/Nfkbiz/Fcgr2b/Bst1/Tnf/Pglyrp1/Cma1
## Count
## GO:0050900 58
## GO:0006935 58
## GO:0042330 58
## GO:0060326 50
## GO:0050867 56
## GO:0050727 53
dotplot(go_c, showCategory = 20,
title = "EnrichmentGO_BP Top20 GO pathways downregulated in KL vs. K",
label_format = 60) # 圖2a